"""
@Descriptions: merge instruction-tuning data format => one file
@Author: Yangjun Wu
@Time: 11/02/2024
"""

import json
import random
import time
from tqdm import tqdm

merge_files = ["../data/train_data/copilot1.05_train.json", "../data/train_data/spider_cot.json"]
output_file_s = "../data/train_data/copilot1.1_train.json"
new_data = []
for f in merge_files:
    with open(f) as f_sample:
        _data = json.load(f_sample)
        print(f"merging data: {len(_data)}")
        new_data += _data
print(f"after merge: {len(new_data)}")

with open(output_file_s, 'w') as merge_sample:
    copilot = json.dumps(new_data, ensure_ascii=False, indent=2)
    merge_sample.write(copilot)
    merge_sample.close()
